R setup
require("knitr")
opts_knit$set(root.dir = '~/Documents/PhD/Thesis/quantseq_dataAnalysis/deseq2_dataAnalysis_2024/results/results_4wpc')require("knitr")
opts_knit$set(root.dir = '~/Documents/PhD/Thesis/quantseq_dataAnalysis/deseq2_dataAnalysis_2024/results/results_4wpc')
#| echo: false
p {
text-align: justify
}
figcaption {
text-align: center;
}
h1 {
text-align: center;
}
h2 {
text-align: center;
}
h3 {
text-align: center;
}
h4 {
text-align: center;
}
div {
text-indent: 50px;
}
/* Add space between figures and text */
<!-- figure { -->
<!-- margin-bottom: 60px; /* Adjust the value as needed */ -->
<!-- margin-top: 60px; -->
<!-- } -->suppressPackageStartupMessages({
library('tidyverse')
library('VennDiagram')
library('clusterProfiler')
library('gprofiler2')
library('org.Hs.eg.db')
library('enrichplot')
})
## Loading functions ----
source(
'~/Documents/PhD/Thesis/quantseq_dataAnalysis/deseq2_dataAnalysis_2024/github_repo/scripts/functions_data-wrangling_march24.R'
)
immune_related_GOterms <-
read.table(
'~/Documents/PhD/Thesis/quantseq_dataAnalysis/deseq2_dataAnalysis_2024/github_repo/goTerm_lists/immune_related_GOterms.tsv',
header = T,
sep = '\t'
) # loading dataframe containing immune related GO terms
## Loading results ----
setwd(
'~/Documents/PhD/Thesis/quantseq_dataAnalysis/deseq2_dataAnalysis_2024/results/results_4wpc'
)
results_files <-
list.files(pattern = 'res_.*') # regex matching results files
list.data <- list()
for (i in 1:length(results_files)) {
list.data[[i]] <- load(results_files[i])
}To create this document, raw sequencing data was:
Demultiplexed
Trimmed
Mapped to Ssal_v3.1, which is the current reference assembly for Atlantic salmon (released in 2021)
Run through a feature count, which assigns sequencing reads to the mapped genes
Gene counts were modelled using DESeq2’s negative binomial distribution
Results tables were extracted based on the desired contrasts/comparisons, as displayed in the example below:
res_ivld_vs_conu_4wpc <- results(ddsGroup_ensembl, contrast=c("group","ivld.4wpc","conu.4wpc"))
res_ivhd_vs_conu_4wpc <- results(ddsGroup_ensembl, contrast=c("group","ivhd.4wpc","conu.4wpc"))
res_gata3_vs_conu_4wpc <- results(ddsGroup_ensembl, contrast=c("group","gata3.4wpc","conu.4wpc"))
res_eomes_vs_conu_4wpc <- results(ddsGroup_ensembl, contrast=c("group","eomes.4wpc","conu.4wpc"))
res_dnavaccine_vs_conu_4wpc <- results(ddsGroup_ensembl, contrast=c("group","dnavaccine.4wpc","conu.4wpc"))## For loop, significant genes ----
results_files <-
ls(pattern = '^res_.*conu.4wpc') # listing Global Environment files matching regex pattern, to be used in the for loop. In this case, starting with res, containing conu, and 1wpc
# Create an empty list to store results
all_significant_genes <- list()
# Assuming results_files is a list of object names
for (name in results_files) {
# Remove the file extension ".RData" if present
name <- sub(".RData$", "", name)
# Retrieve the object from the global environment
results <- get(name, envir = .GlobalEnv)
# Call the significant_genes function and store the result
result_genes <- significant_genes(results)
# Store the result in the list
all_significant_genes[[name]] <- result_genes
}
### Renaming dataframes ----
names(all_significant_genes) <-
c('DNA vaccine',
'EOMES',
'GATA3',
'IV-HD',
'IV-LD')
### Delisting dataframes ----
list2env(all_significant_genes, envir = .GlobalEnv) # splitting the dataframes to summarize their contents
### Summarizing differentially expressed genes ----
deg_regulation_summary <-
lapply(mget(c(
'DNA vaccine',
'EOMES',
'GATA3',
'IV-HD',
'IV-LD'
)), sig_genes_metrics)
### Transforming summary into a tibble ----
deg_regulation_summary <-
as_tibble(bind_rows(deg_regulation_summary, .id = 'Treatment'))
### Reordering factors to plot ----
deg_regulation_summary$Treatment <-
factor(
deg_regulation_summary$Treatment,
levels = c('IV-LD',
'IV-HD',
'DNA vaccine',
'EOMES',
'GATA3')
)
results_files <-
list.files(pattern = '^res_.*_conu_4wpc') # regex matching results files
list.data <- list()
for (i in 1:length(results_files)) {
list.data[[i]] <- load(results_files[i])
}
# List of objects containing all results files matching the regex pattern
objects <- ls(pattern = "^res_.*_conu_4wpc")
# List of treatments
treatments <- c("dnavaccine", "eomes", "gata3", "ivhd", "ivld")
# Initialize an empty list to store data frames
result_list <- list()
# Loop through each treatment and apply the function, also adding a column with treatment information
for (treatment in treatments) {
obj <- paste0("res_", treatment, "_vs_conu_4wpc")
result_list[[treatment]] <-
improved_data_wrangling(get(obj), treatment = treatment, sampling_point = "4wpc")
}
rm(i,
obj,
objects,
ora_down,
ora_up,
results_files,
treatment,
treatments)
# Vertically merge data frames from all treatments and remove NA
merged_df <- do.call(rbind, result_list) %>% na.omit()
# Reduce and intersect the ID column in result_list to find common differentially regulated genes among all treatments
reduced_intersected_ids <-
Reduce(intersect, lapply(result_list, function(df)
df$ID))
# Print the reduced and intersected IDs
print(reduced_intersected_ids)
# Find the common IDs between reduced_intersected_ids and merged_df. This way we are keeping only the common regulated salmon genes
common_ids <- intersect(merged_df$ID, reduced_intersected_ids)
# Subset merged_df to keep only rows with common IDs
# merged_subset <- merged_df[merged_df$ID %in% common_ids,]
merged_subset <-
subset(merged_df, ID %in% common_ids) # tidy version
merged_df[duplicated(merged_df$ortholog_name),] %>% arrange(ortholog_name) %>% print(n = 100) # identifying duplicates in merged_df
# Splitting between up and downregulated genes, and sorting by gene name and adjusted p-value
upregulated_gsea <-
merged_subset %>% dplyr::filter(log2FC > 0) %>% arrange(ortholog_name, adjusted_p.val)
downregulated_gsea <-
merged_subset %>% dplyr::filter(log2FC < 0) %>% arrange(ortholog_name, adjusted_p.val)
# Keeping just one of the ortholog copies, the one with the lowest adjusted p-value
unique_upregulated <-
upregulated_gsea[!duplicated(upregulated_gsea$ortholog_ensg),] %>% arrange(ortholog_name)
unique_downregulated <-
downregulated_gsea[!duplicated(downregulated_gsea$ortholog_ensg),] %>% arrange(ortholog_name)
# Merging down and upregulated dataframes after removing duplicates
enrichment_unique_common <-
rbind(unique_downregulated, unique_upregulated) %>% dplyr::arrange(desc(log2FC))
# Arranging matrix for GSEA
enrichment_gsea_common <- enrichment_unique_common$log2FC
names(enrichment_gsea_common) <-
enrichment_unique_common$ortholog_ensgdeg_regulation_summary %>%
ggplot(aes(x = Treatment, y = n, fill = Regulation)) +
geom_bar(
stat = 'identity',
position = 'dodge',
colour = 'black',
linewidth = .2
) +
scale_fill_manual(values = c('#BAFFC9',
'#FFDFBA')) +
ylab('Number of genes') +
geom_text(
aes(label = n),
position = position_dodge(width = 0.9),
vjust = -0.5,
size = 4
) +
# ggtitle(label = 'Differentially Expressed Genes',
# subtitle = 'Cardiac tissue at 1 WPC, CONU as reference') +
theme_light() +
theme(panel.grid = element_blank()) +
theme(text = element_text(size = 14, family = 'serif')) +
theme(axis.text.x = element_text(angle = 0, vjust = 0.5)) +
geom_vline(
xintercept = c(1.5, 2.5, 3.5, 4.5, 6.5, 7.5, 8.5),
linetype = 'dotted',
linewidth = 0.2
) +
geom_hline(yintercept = 0, size = 0.2)### For loop, significant genes ----
results_files <-
ls(pattern = '^res_.*(ptag|ivld).4wpc') # listing Global Environment files matching regex pattern, to be used in the for loop. In this case, starting with res, and containing conu or ivld, and 4wpc
# Create an empty list to store results
all_significant_genes <- list()
# Assuming results_files is a list of object names
for (name in results_files) {
# Remove the file extension ".RData" if present
name <- sub(".RData$", "", name)
# Retrieve the object from the global environment
results <- get(name, envir = .GlobalEnv)
# Call the significant_genes function and store the result
result_genes <- significant_genes(results)
# Store the result in the list
all_significant_genes[[name]] <- result_genes
}
### Renaming dataframes ----
names(all_significant_genes) <-
c(
'DNA vaccine vs IV-LD',
'DNA vaccine vs pTagRFP',
'EOMES vs IV-LD',
'EOMES vs pTagRFP',
'GATA3 vs IV-LD',
'GATA3 vs pTagRFP',
'IV-HD vs IV-LD',
'IV-HD vs pTagRFP',
'IV-LD vs pTagRFP'
)
### Delisting dataframes ----
list2env(all_significant_genes, envir = .GlobalEnv) # splitting the dataframes to summarize their contents
### Summarizing differentially expressed genes ----
deg_regulation_summary <-
lapply(mget(
c(
'DNA vaccine vs IV-LD',
'DNA vaccine vs pTagRFP',
'EOMES vs IV-LD',
'EOMES vs pTagRFP',
'GATA3 vs IV-LD',
'GATA3 vs pTagRFP',
'IV-HD vs IV-LD',
'IV-HD vs pTagRFP',
'IV-LD vs pTagRFP'
)
), sig_genes_metrics)
### Transforming summary into a tibble ----
deg_regulation_summary <-
as_tibble(bind_rows(deg_regulation_summary, .id = 'Contrast'))
deg_regulation_summary <-
deg_regulation_summary %>% add_row(Contrast = 'DNA vaccine vs IV-LD',
Regulation = 'downregulated',
n = 0)
### Reordering factors to plot ----
deg_regulation_summary$Contrast <-
factor(
deg_regulation_summary$Contrast,
levels = c(
'IV-LD vs pTagRFP',
'IV-HD vs pTagRFP',
'DNA vaccine vs pTagRFP',
'EOMES vs pTagRFP',
'GATA3 vs pTagRFP',
'IV-HD vs IV-LD',
'DNA vaccine vs IV-LD',
'EOMES vs IV-LD',
'GATA3 vs IV-LD'
)
)deg_regulation_summary %>%
ggplot(aes(x = Contrast, y = n, fill = Regulation)) +
geom_bar(
stat = 'identity',
position = 'dodge',
colour = 'black',
linewidth = .2
) +
scale_fill_brewer(palette = 'Dark2') +
ylab('Number of genes') +
geom_text(
aes(label = n),
position = position_dodge(width = 0.9),
vjust = -0.5,
size = 4
) +
# ggtitle(label = 'Differentially Expressed Genes',
# subtitle = 'Cardiac tissue at 4 WPC') +
theme_light() +
theme(panel.grid = element_blank()) +
theme(text = element_text(size = 14, family = 'serif')) +
theme(axis.text.x = element_text(angle = 270, vjust = 0.5)) +
geom_vline(
xintercept = c(1.5, 2.5, 3.5, 4.5, 6.5, 7.5, 8.5),
linetype = 'dotted',
linewidth = 0.2
) +
geom_vline(xintercept = 5.5,
linetype = 'solid',
linewidth = 0.3) +
geom_hline(yintercept = 0, size = 0.2) +
annotate(
'text',
x = 3,
y = 550,
label = 'vs pTagRFP',
size = 4,
family = 'serif'
) +
annotate(
'text',
x = 7.5,
y = 550,
label = 'vs IV-LD',
size = 4,
family = 'serif'
)
# Get names of all objects in global environment with a regex pattern matching 'res_.*_conu_4wpc'
res_objects <- ls(pattern = "res_.*_conu_4wpc")
# Apply significant_genes function to each res object
for (res_object in res_objects) {
# Get the object from the global environment
result <- get(res_object)
# Apply significant_genes function
significant_result <- significant_genes(result)
# Assign the result back to the global environment
assign(paste0("significant_", res_object), significant_result, envir = .GlobalEnv)
}
rm(list.data, result, i, res_object, res_objects, results_files, significant_result)The results tables used in these Venn diagrams and ORA were extracted using CONU as reference.
# create list of downregulated geneIDs
b <- list(
A = significant_res_dnavaccine_vs_conu_4wpc[significant_res_dnavaccine_vs_conu_4wpc$log2FC < 0, ]$ID,
B = significant_res_eomes_vs_conu_4wpc[significant_res_eomes_vs_conu_4wpc$log2FC < 0, ]$ID,
C = significant_res_gata3_vs_conu_4wpc[significant_res_gata3_vs_conu_4wpc$log2FC < 0, ]$ID,
D = significant_res_ivhd_vs_conu_4wpc[significant_res_ivhd_vs_conu_4wpc$log2FC < 0, ]$ID,
E = significant_res_ivld_vs_conu_4wpc[significant_res_ivld_vs_conu_4wpc$log2FC < 0, ]$ID
)
# add treatment names
names(b) <-
c('DNA vaccine', 'EOMES', 'GATA3', 'IVHD', 'IVLD')
# check gene counts per treatment
kable((sapply(b, length)), col.names = c('count'))| count | |
|---|---|
| DNA vaccine | 858 |
| EOMES | 1353 |
| GATA3 | 1429 |
| IVHD | 608 |
| IVLD | 250 |
venn1 <- display_venn(
b,
fill = c('#cdb4db', '#bde0fe', '#ccd5ae', '#d4a373', '#f08080'),
lwd = 1,
cex = 1,
cat.cex = 1,
cat.fontfamily = 'serif',
# cat.fontface = 'bold',
cat.default.pos = 'outer',
cat.dist = c(0.20, 0.20, 0.22, 0.20,.20),
cat.pos = c(360, 360, 250, 90, 360)
)From the Venn diagram, I gathered the genes that are common to all treatments, and converted them to h. sapiens orthologs. They are listed in Table 1.
common_genes_downregulated_4wpc <-
Reduce(
intersect,
list(
significant_res_dnavaccine_vs_conu_4wpc[significant_res_dnavaccine_vs_conu_4wpc$log2FC < 0,]$ID,
significant_res_gata3_vs_conu_4wpc[significant_res_gata3_vs_conu_4wpc$log2FC < 0,]$ID,
significant_res_eomes_vs_conu_4wpc[significant_res_eomes_vs_conu_4wpc$log2FC < 0,]$ID,
significant_res_ivhd_vs_conu_4wpc[significant_res_ivhd_vs_conu_4wpc$log2FC < 0,]$ID,
significant_res_ivld_vs_conu_4wpc[significant_res_ivld_vs_conu_4wpc$log2FC < 0,]$ID
)
)
common_downregulated_orthologs_4wpc <- gorth(
query = common_genes_downregulated_4wpc,
source_organism = 'ssalar',
target_organism = 'hsapiens',
mthreshold = 1,
filter_na = F
)
common_downregulated_orthologs_tbl_4wpc <-
as_tibble(common_downregulated_orthologs_4wpc) %>% dplyr::select(.,
input,
ortholog_name,
ortholog_ensg,
description) %>% dplyr::rename(
.,
ssalar_ensembl = input,
hsapiens_ortholog = ortholog_name,
hsapiens_ensembl = ortholog_ensg,
description = description
)
common_downregulated_orthologs_tbl_4wpc %>% head(., n = 20) %>%
kable(
booktabs = TRUE,
col.names = c(
'Salmon ENSEMBL',
'Human ortholog',
'Human ENSEMBL',
'Description'
),
align = 'c') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| Salmon ENSEMBL | Human ortholog | Human ENSEMBL | Description |
|---|---|---|---|
| ENSSSAG00000056136 | COPB1 | ENSG00000129083 | COPI coat complex subunit beta 1 [Source:HGNC Symbol;Acc:HGNC:2231] |
| ENSSSAG00000008058 | CHMP1B | ENSG00000255112 | charged multivesicular body protein 1B [Source:HGNC Symbol;Acc:HGNC:24287] |
| ENSSSAG00000042725 | ACTR1A | ENSG00000138107 | actin related protein 1A [Source:HGNC Symbol;Acc:HGNC:167] |
| ENSSSAG00000063331 | TLN1 | ENSG00000137076 | talin 1 [Source:HGNC Symbol;Acc:HGNC:11845] |
| ENSSSAG00000104939 | ATP6AP1 | ENSG00000071553 | ATPase H+ transporting accessory protein 1 [Source:HGNC Symbol;Acc:HGNC:868] |
| ENSSSAG00000078656 | PDCD6 | ENSG00000249915 | programmed cell death 6 [Source:HGNC Symbol;Acc:HGNC:8765] |
| ENSSSAG00000071607 | N/A | N/A | N/A |
| ENSSSAG00000082229 | ARHGDIA | ENSG00000141522 | Rho GDP dissociation inhibitor alpha [Source:HGNC Symbol;Acc:HGNC:678] |
| ENSSSAG00000040232 | ITGAV | ENSG00000138448 | integrin subunit alpha V [Source:HGNC Symbol;Acc:HGNC:6150] |
| ENSSSAG00000043967 | CD63 | ENSG00000135404 | CD63 molecule [Source:HGNC Symbol;Acc:HGNC:1692] |
| ENSSSAG00000005722 | MLKL | ENSG00000168404 | mixed lineage kinase domain like pseudokinase [Source:HGNC Symbol;Acc:HGNC:26617] |
| ENSSSAG00000052063 | PDLIM1 | ENSG00000107438 | PDZ and LIM domain 1 [Source:HGNC Symbol;Acc:HGNC:2067] |
| ENSSSAG00000046808 | P4HB | ENSG00000185624 | prolyl 4-hydroxylase subunit beta [Source:HGNC Symbol;Acc:HGNC:8548] |
| ENSSSAG00000043688 | TKTL2 | ENSG00000151005 | transketolase like 2 [Source:HGNC Symbol;Acc:HGNC:25313] |
| ENSSSAG00000042656 | DBNL | ENSG00000136279 | drebrin like [Source:HGNC Symbol;Acc:HGNC:2696] |
| ENSSSAG00000078094 | VRK2 | ENSG00000028116 | VRK serine/threonine kinase 2 [Source:HGNC Symbol;Acc:HGNC:12719] |
| ENSSSAG00000049114 | MCUB | ENSG00000005059 | mitochondrial calcium uniporter dominant negative subunit beta [Source:HGNC Symbol;Acc:HGNC:26076] |
| ENSSSAG00000057363 | MYO1C | ENSG00000197879 | myosin IC [Source:HGNC Symbol;Acc:HGNC:7597] |
| ENSSSAG00000115504 | RNF121 | ENSG00000137522 | ring finger protein 121 [Source:HGNC Symbol;Acc:HGNC:21070] |
| ENSSSAG00000046480 | PTP4A2 | ENSG00000184007 | protein tyrosine phosphatase 4A2 [Source:HGNC Symbol;Acc:HGNC:9635] |
From an input of 155 salmon ENSEMBL gene IDs, 124 genes were converted to human orthologs.
common_ora_downreg_4wpc <-
enrichGO(
gene = common_downregulated_orthologs_tbl_4wpc$hsapiens_ensembl,
OrgDb = 'org.Hs.eg.db',
keyType = 'ENSEMBL',
ont = 'BP',
minGSSize = 10,
maxGSSize = 1000,
pAdjustMethod = 'BH',
pvalueCutoff = 0.05,
readable = T
)
# common_downregulated_orthologs_tbl_4wpc %>% dplyr::filter(!str_detect(hsapiens_ortholog, 'N/A')) %>% pull(., hsapiens_ortholog) %>% length()as_tibble(common_ora_downreg_4wpc) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>% # sorting 'ID' by Adjusted p-value
dplyr::slice(1:20) %>%
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity",
colour = 'black',
linewidth = 0.3) +
coord_flip() +
scale_fill_distiller(palette = 'Reds',
name = 'Adjusted \n p-value') +
# guide = guide_colorbar(reverse = TRUE)) +
scale_y_continuous() +
guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)as_tibble(common_ora_downreg_4wpc) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>% # sorting 'ID' by Adjusted p-value
dplyr::slice(1:20) %>%
dplyr::select(ID, Description) %>%
map_df(., rev) %>%
kable(
booktabs = TRUE,
col.names = c(
'GO Term',
'Description'
),
align = 'l') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| GO Term | Description |
|---|---|
| GO:0030335 | positive regulation of cell migration |
| GO:2001233 | regulation of apoptotic signaling pathway |
| GO:0043030 | regulation of macrophage activation |
| GO:0019221 | cytokine-mediated signaling pathway |
| GO:0002274 | myeloid leukocyte activation |
| GO:0045058 | T cell selection |
| GO:0097066 | response to thyroid hormone |
| GO:0002263 | cell activation involved in immune response |
| GO:0002366 | leukocyte activation involved in immune response |
| GO:0007015 | actin filament organization |
| GO:0030100 | regulation of endocytosis |
| GO:0033627 | cell adhesion mediated by integrin |
| GO:0030029 | actin filament-based process |
| GO:0071402 | cellular response to lipoprotein particle stimulus |
| GO:0050764 | regulation of phagocytosis |
| GO:0006909 | phagocytosis |
| GO:0030036 | actin cytoskeleton organization |
| GO:0097067 | cellular response to thyroid hormone stimulus |
| GO:0006897 | endocytosis |
| GO:0007229 | integrin-mediated signaling pathway |
downregulated_immune_common_4wpc <-
filter_rows_by_GO_term(common_ora_downreg_4wpc, immune_related_GOterms, 'goterms')
as_tibble(downregulated_immune_common_4wpc) %>%
dplyr::slice(1:10) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>%
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(
stat = "identity",
colour = 'black',
linewidth = 0.3,
width = 0.8,
show.legend = TRUE
) +
coord_flip() +
scale_fill_distiller(palette = 'Purples',
name = 'Adjusted \n p-value') +
# guide = guide_colorbar(reverse = FALSE)) +
scale_y_continuous() +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)as_tibble(downregulated_immune_common_4wpc) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>% # sorting 'ID' by Adjusted p-value
dplyr::slice(1:10) %>%
dplyr::select(ID, Description) %>%
map_df(., rev) %>%
kable(
booktabs = TRUE,
col.names = c('GO Term',
'Description'),
align = 'l'
) %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| GO Term | Description |
|---|---|
| GO:0019221 | cytokine-mediated signaling pathway |
| GO:0002274 | myeloid leukocyte activation |
| GO:0045058 | T cell selection |
| GO:0097066 | response to thyroid hormone |
| GO:0002263 | cell activation involved in immune response |
| GO:0002366 | leukocyte activation involved in immune response |
| GO:0071402 | cellular response to lipoprotein particle stimulus |
| GO:0050764 | regulation of phagocytosis |
| GO:0006909 | phagocytosis |
| GO:0097067 | cellular response to thyroid hormone stimulus |
Because there were five groups with more than 100 genes in the Venn diagram on Figure 3, I analyzed them all separately:
EOMES exclusive (382 genes)
GATA3 exclusive (456 genes)
GATA3-EOMES intersection (249 genes)
GATA3-EOMES-DNA vaccine intersection (148 genes)
GATA3-EOMES-DNA vaccine-IVHD intersection (176 genes)
dnavaccine_4wpc_down <-
significant_res_dnavaccine_vs_conu_4wpc[significant_res_dnavaccine_vs_conu_4wpc$log2FC < 0,]$ID
eomes_4wpc_down <-
significant_res_eomes_vs_conu_4wpc[significant_res_eomes_vs_conu_4wpc$log2FC < 0,]$ID
ivhd_4wpc_down <-
significant_res_ivhd_vs_conu_4wpc[significant_res_ivhd_vs_conu_4wpc$log2FC < 0, ]$ID
gata3_4wpc_down <-
significant_res_gata3_vs_conu_4wpc[significant_res_gata3_vs_conu_4wpc$log2FC < 0, ]$ID
ivld_4wpc_down <-
significant_res_ivld_vs_conu_4wpc[significant_res_ivld_vs_conu_4wpc$log2FC < 0, ]$ID
gata3_exclusive_down_genes_4wpc <-
setdiff(gata3_4wpc_down,
c(dnavaccine_4wpc_down, eomes_4wpc_down, ivhd_4wpc_down, ivld_4wpc_down))
gata3_downregulated_orthologs_4wpc <- gorth(
query = gata3_exclusive_down_genes_4wpc,
source_organism = 'ssalar',
target_organism = 'hsapiens',
mthreshold = 1,
filter_na = F
)
gata3_downregulated_orthologs_tbl_4wpc <-
as_tibble(gata3_downregulated_orthologs_4wpc) %>% dplyr::select(.,
input,
ortholog_name,
ortholog_ensg,
description) %>% dplyr::rename(
.,
ssalar_ensembl = input,
hsapiens_ortholog = ortholog_name,
hsapiens_ensembl = ortholog_ensg,
description = description
)
gata3_downregulated_orthologs_tbl_4wpc %>% head(., n = 20) %>%
kable(
booktabs = TRUE,
col.names = c(
'Salmon ENSEMBL',
'Human ortholog',
'Human ENSEMBL',
'Description'
),
align = 'c') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| Salmon ENSEMBL | Human ortholog | Human ENSEMBL | Description |
|---|---|---|---|
| ENSSSAG00000063564 | N/A | N/A | N/A |
| ENSSSAG00000049859 | ADD1 | ENSG00000087274 | adducin 1 [Source:HGNC Symbol;Acc:HGNC:243] |
| ENSSSAG00000002391 | KHDRBS1 | ENSG00000121774 | KH RNA binding domain containing, signal transduction associated 1 [Source:HGNC Symbol;Acc:HGNC:18116] |
| ENSSSAG00000088854 | N/A | N/A | N/A |
| ENSSSAG00000043692 | HNRNPAB | ENSG00000197451 | heterogeneous nuclear ribonucleoprotein A/B [Source:HGNC Symbol;Acc:HGNC:5034] |
| ENSSSAG00000107579 | N/A | N/A | N/A |
| ENSSSAG00000076252 | AP3B1 | ENSG00000132842 | adaptor related protein complex 3 subunit beta 1 [Source:HGNC Symbol;Acc:HGNC:566] |
| ENSSSAG00000005920 | ETF1 | ENSG00000120705 | eukaryotic translation termination factor 1 [Source:HGNC Symbol;Acc:HGNC:3477] |
| ENSSSAG00000076210 | N/A | N/A | N/A |
| ENSSSAG00000060562 | NUBP2 | ENSG00000095906 | NUBP iron-sulfur cluster assembly factor 2, cytosolic [Source:HGNC Symbol;Acc:HGNC:8042] |
| ENSSSAG00000077073 | KMT2C | ENSG00000055609 | lysine methyltransferase 2C [Source:HGNC Symbol;Acc:HGNC:13726] |
| ENSSSAG00000107241 | DHX15 | ENSG00000109606 | DEAH-box helicase 15 [Source:HGNC Symbol;Acc:HGNC:2738] |
| ENSSSAG00000081700 | DYNC1I2 | ENSG00000077380 | dynein cytoplasmic 1 intermediate chain 2 [Source:HGNC Symbol;Acc:HGNC:2964] |
| ENSSSAG00000066076 | MAMSTR | ENSG00000176909 | MEF2 activating motif and SAP domain containing transcriptional regulator [Source:HGNC Symbol;Acc:HGNC:26689] |
| ENSSSAG00000070289 | RBM39 | ENSG00000131051 | RNA binding motif protein 39 [Source:HGNC Symbol;Acc:HGNC:15923] |
| ENSSSAG00000005064 | CRELD2 | ENSG00000184164 | cysteine rich with EGF like domains 2 [Source:HGNC Symbol;Acc:HGNC:28150] |
| ENSSSAG00000068484 | COPB1 | ENSG00000129083 | COPI coat complex subunit beta 1 [Source:HGNC Symbol;Acc:HGNC:2231] |
| ENSSSAG00000075198 | LARS1 | ENSG00000133706 | leucyl-tRNA synthetase 1 [Source:HGNC Symbol;Acc:HGNC:6512] |
| ENSSSAG00000084004 | N/A | N/A | N/A |
| ENSSSAG00000105293 | N/A | N/A | N/A |
gata3_ora_downreg_4wpc <-
enrichGO(
gene = gata3_downregulated_orthologs_tbl_4wpc$hsapiens_ensembl,
OrgDb = 'org.Hs.eg.db',
keyType = 'ENSEMBL',
ont = 'BP',
minGSSize = 10,
maxGSSize = 1000,
pAdjustMethod = 'BH',
pvalueCutoff = 0.05,
readable = T
)From an input of 454 salmon ENSEMBL gene IDs exclusive to GATA3, 341 genes were converted to human orthologs, and used for over-representation analysis (Figure 6). The terms enriched in Figure 6 were then filtered through the immune-related term list to plot Figure 7.
as_tibble(gata3_ora_downreg_4wpc) %>%
dplyr::slice(1:20) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>% # sorting 'ID' by Adjusted p-value
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity",
colour = 'black',
linewidth = 0.3) +
coord_flip() +
scale_fill_distiller(palette = 'Reds',
name = 'Adjusted \n p-value') +
# guide = guide_colorbar(reverse = TRUE)) +
scale_y_continuous() +
guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)as_tibble(gata3_ora_downreg_4wpc) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>%
dplyr::slice(1:20) %>%
dplyr::select(ID, Description) %>%
map_df(., rev) %>%
kable(
booktabs = TRUE,
col.names = c('GO Term',
'Description'),
align = 'l'
) %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| GO Term | Description |
|---|---|
| GO:0006457 | protein folding |
| GO:1902904 | negative regulation of supramolecular fiber organization |
| GO:0018193 | peptidyl-amino acid modification |
| GO:0006888 | endoplasmic reticulum to Golgi vesicle-mediated transport |
| GO:0042149 | cellular response to glucose starvation |
| GO:0051494 | negative regulation of cytoskeleton organization |
| GO:0030098 | lymphocyte differentiation |
| GO:0010762 | regulation of fibroblast migration |
| GO:0048193 | Golgi vesicle transport |
| GO:0007169 | transmembrane receptor protein tyrosine kinase signaling pathway |
| GO:0031333 | negative regulation of protein-containing complex assembly |
| GO:0010761 | fibroblast migration |
| GO:0006983 | ER overload response |
| GO:0006886 | intracellular protein transport |
| GO:0030029 | actin filament-based process |
| GO:0007015 | actin filament organization |
| GO:0030036 | actin cytoskeleton organization |
| GO:0010639 | negative regulation of organelle organization |
| GO:0043254 | regulation of protein-containing complex assembly |
| GO:0051129 | negative regulation of cellular component organization |
downregulated_immune_gata3_4wpc <-
filter_rows_by_GO_term(gata3_ora_downreg_4wpc, immune_related_GOterms, 'goterms')
as_tibble(downregulated_immune_gata3_4wpc) %>%
dplyr::slice(1:10) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>%
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(
stat = "identity",
colour = 'black',
linewidth = 0.3,
width = 0.8,
show.legend = TRUE
) +
coord_flip() +
scale_fill_distiller(palette = 'Purples',
name = 'Adjusted \n p-value',
guide = guide_colorbar(reverse = FALSE)) +
scale_y_continuous() +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)as_tibble(downregulated_immune_gata3_4wpc) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>%
dplyr::slice(1:10) %>%
dplyr::select(ID, Description) %>%
map_df(., rev) %>%
kable(
booktabs = TRUE,
col.names = c('GO Term',
'Description'),
align = 'l'
) %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| GO Term | Description |
|---|---|
| GO:0032868 | response to insulin |
| GO:0010035 | response to inorganic substance |
| GO:0030217 | T cell differentiation |
| GO:0071496 | cellular response to external stimulus |
| GO:1903131 | mononuclear cell differentiation |
| GO:0002521 | leukocyte differentiation |
| GO:0009267 | cellular response to starvation |
| GO:0032869 | cellular response to insulin stimulus |
| GO:0030098 | lymphocyte differentiation |
| GO:0006983 | ER overload response |
eomes_exclusive_down_genes_4wpc <-
setdiff(eomes_4wpc_down,
c(dnavaccine_4wpc_down, ivhd_4wpc_down, gata3_4wpc_down, ivld_4wpc_down))
eomes_down_orthologs_4wpc <- gorth(
query = eomes_exclusive_down_genes_4wpc,
source_organism = 'ssalar',
target_organism = 'hsapiens',
mthreshold = 1,
filter_na = F
)
eomes_down_orthologs_tbl_4wpc <-
as_tibble(eomes_down_orthologs_4wpc) %>% dplyr::select(.,
input,
ortholog_name,
ortholog_ensg,
description) %>% dplyr::rename(
.,
ssalar_ensembl = input,
hsapiens_ortholog = ortholog_name,
hsapiens_ensembl = ortholog_ensg,
description = description
)
eomes_down_orthologs_tbl_4wpc %>% head(., n = 20) %>%
kable(
booktabs = TRUE,
col.names = c(
'Salmon ENSEMBL',
'Human ortholog',
'Human ENSEMBL',
'Description'
),
align = 'c') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| Salmon ENSEMBL | Human ortholog | Human ENSEMBL | Description |
|---|---|---|---|
| ENSSSAG00000094153 | N/A | N/A | N/A |
| ENSSSAG00000082595 | N/A | N/A | N/A |
| ENSSSAG00000007854 | ATP6AP2 | ENSG00000182220 | ATPase H+ transporting accessory protein 2 [Source:HGNC Symbol;Acc:HGNC:18305] |
| ENSSSAG00000107744 | SFXN3 | ENSG00000107819 | sideroflexin 3 [Source:HGNC Symbol;Acc:HGNC:16087] |
| ENSSSAG00000002903 | ADAM33 | ENSG00000149451 | ADAM metallopeptidase domain 33 [Source:HGNC Symbol;Acc:HGNC:15478] |
| ENSSSAG00000104948 | N/A | N/A | N/A |
| ENSSSAG00000099571 | N/A | N/A | N/A |
| ENSSSAG00000063098 | YWHAQ | ENSG00000134308 | tyrosine 3-monooxygenase/tryptophan 5-monooxygenase activation protein theta [Source:HGNC Symbol;Acc:HGNC:12854] |
| ENSSSAG00000005645 | N/A | N/A | N/A |
| ENSSSAG00000075127 | AP2M1 | ENSG00000161203 | adaptor related protein complex 2 subunit mu 1 [Source:HGNC Symbol;Acc:HGNC:564] |
| ENSSSAG00000056674 | N/A | N/A | N/A |
| ENSSSAG00000066201 | GLUD1 | ENSG00000148672 | glutamate dehydrogenase 1 [Source:HGNC Symbol;Acc:HGNC:4335] |
| ENSSSAG00000119962 | N/A | N/A | N/A |
| ENSSSAG00000041622 | ZYX | ENSG00000159840 | zyxin [Source:HGNC Symbol;Acc:HGNC:13200] |
| ENSSSAG00000089095 | CLTA | ENSG00000122705 | clathrin light chain A [Source:HGNC Symbol;Acc:HGNC:2090] |
| ENSSSAG00000114855 | N/A | N/A | N/A |
| ENSSSAG00000010060 | STAT3 | ENSG00000168610 | signal transducer and activator of transcription 3 [Source:HGNC Symbol;Acc:HGNC:11364] |
| ENSSSAG00000008813 | IPO5 | ENSG00000065150 | importin 5 [Source:HGNC Symbol;Acc:HGNC:6402] |
| ENSSSAG00000067990 | PSMC4 | ENSG00000013275 | proteasome 26S subunit, ATPase 4 [Source:HGNC Symbol;Acc:HGNC:9551] |
| ENSSSAG00000112907 | RBM8A | ENSG00000265241 | RNA binding motif protein 8A [Source:HGNC Symbol;Acc:HGNC:9905] |
eomes_ora_downreg_4wpc <-
enrichGO(
gene = eomes_down_orthologs_tbl_4wpc$hsapiens_ensembl,
OrgDb = 'org.Hs.eg.db',
keyType = 'ENSEMBL',
ont = 'BP',
minGSSize = 10,
maxGSSize = 1000,
pAdjustMethod = 'BH',
pvalueCutoff = 0.05,
readable = T
)From an input of 382 salmon ENSEMBL gene IDs exclusive to EOMES, 278 genes were converted to human orthologs.
These 278 genes were used for over-representation analysis Figure 8, and filtered by immune-related term in Figure 9.
as_tibble(eomes_ora_downreg_4wpc) %>%
dplyr::slice(1:20) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>% # sorting 'Description' by Adjusted p-value
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity",
colour = 'black',
linewidth = 0.3) +
coord_flip() +
scale_fill_distiller(palette = 'Reds',
name = 'Adjusted \n p-value',
guide = guide_colorbar(reverse = TRUE)) +
scale_y_continuous() +
guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)as_tibble(eomes_ora_downreg_4wpc) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>%
dplyr::slice(1:20) %>%
map_df(., rev) %>%
dplyr::select(ID, Description) %>%
kable(
booktabs = TRUE,
col.names = c('GO Term',
'Description'),
align = 'l'
) %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| GO Term | Description |
|---|---|
| GO:0030029 | actin filament-based process |
| GO:1903050 | regulation of proteolysis involved in protein catabolic process |
| GO:0071453 | cellular response to oxygen levels |
| GO:0019752 | carboxylic acid metabolic process |
| GO:0030162 | regulation of proteolysis |
| GO:0007015 | actin filament organization |
| GO:0060249 | anatomical structure homeostasis |
| GO:0001894 | tissue homeostasis |
| GO:2001242 | regulation of intrinsic apoptotic signaling pathway |
| GO:0048871 | multicellular organismal-level homeostasis |
| GO:0071604 | transforming growth factor beta production |
| GO:0006984 | ER-nucleus signaling pathway |
| GO:0006886 | intracellular protein transport |
| GO:0034976 | response to endoplasmic reticulum stress |
| GO:0010498 | proteasomal protein catabolic process |
| GO:0071634 | regulation of transforming growth factor beta production |
| GO:0006511 | ubiquitin-dependent protein catabolic process |
| GO:0043632 | modification-dependent macromolecule catabolic process |
| GO:0019941 | modification-dependent protein catabolic process |
| GO:0051603 | proteolysis involved in protein catabolic process |
downregulated_immune_eomes_4wpc <-
filter_rows_by_GO_term(eomes_ora_downreg_4wpc, immune_related_GOterms, 'goterms')
as_tibble(downregulated_immune_eomes_4wpc) %>%
dplyr::slice(1:10) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>% # sorting 'Description' by Adjusted p-value
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity",
colour = 'black',
linewidth = 0.3) +
coord_flip() +
scale_fill_distiller(palette = 'Purples',
name = 'Adjusted \n p-value',
guide = guide_colorbar(reverse = TRUE)) +
scale_y_continuous() +
guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)as_tibble(downregulated_immune_eomes_4wpc) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>%
dplyr::slice(1:10) %>%
dplyr::select(ID, Description) %>%
map_df(., rev) %>%
kable(
booktabs = TRUE,
col.names = c(
'GO Term',
'Description'
),
align = 'l') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| GO Term | Description |
|---|---|
| GO:0002363 | alpha-beta T cell lineage commitment |
| GO:0010039 | response to iron ion |
| GO:0009611 | response to wounding |
| GO:2001243 | negative regulation of intrinsic apoptotic signaling pathway |
| GO:0071453 | cellular response to oxygen levels |
| GO:2001242 | regulation of intrinsic apoptotic signaling pathway |
intersection <- intersect(gata3_4wpc_down, eomes_4wpc_down)
# length(intersection)
gata3EOMES_exclusive_genes_down <- setdiff(intersection,
c(dnavaccine_4wpc_down, ivhd_4wpc_down, ivld_4wpc_down))
# length(gata3EOMES_exclusive_genes_down)
gata3EOMES_down_orthologs_4wpc <- gorth(
query = gata3EOMES_exclusive_genes_down,
source_organism = 'ssalar',
target_organism = 'hsapiens',
mthreshold = 1,
filter_na = F
)
# head(gata3EOMES_down_orthologs_4wpc)
# nrow(gata3EOMES_down_orthologs_4wpc)
gata3EOMES_intersection_down_orthologs_tbl_4wpc <-
as_tibble(gata3EOMES_down_orthologs_4wpc) %>% dplyr::select(.,
input,
ortholog_name,
ortholog_ensg,
description) %>% dplyr::rename(
.,
ssalar_ensembl = input,
hsapiens_ortholog = ortholog_name,
hsapiens_ensembl = ortholog_ensg,
description = description
)
gata3EOMES_intersection_down_orthologs_tbl_4wpc %>% head(., n = 20) %>%
kable(
booktabs = TRUE,
col.names = c(
'Salmon ENSEMBL',
'Human ortholog',
'Human ENSEMBL',
'Description'
),
align = 'c') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| Salmon ENSEMBL | Human ortholog | Human ENSEMBL | Description |
|---|---|---|---|
| ENSSSAG00000080819 | N/A | N/A | N/A |
| ENSSSAG00000088119 | DNAJA2 | ENSG00000069345 | DnaJ heat shock protein family (Hsp40) member A2 [Source:HGNC Symbol;Acc:HGNC:14884] |
| ENSSSAG00000041182 | ADRM1 | ENSG00000130706 | ADRM1 26S proteasome ubiquitin receptor [Source:HGNC Symbol;Acc:HGNC:15759] |
| ENSSSAG00000073163 | N/A | N/A | N/A |
| ENSSSAG00000063958 | PSMD12 | ENSG00000197170 | proteasome 26S subunit, non-ATPase 12 [Source:HGNC Symbol;Acc:HGNC:9557] |
| ENSSSAG00000104475 | CWC25 | ENSG00000273559 | CWC25 spliceosome associated protein homolog [Source:HGNC Symbol;Acc:HGNC:25989] |
| ENSSSAG00000101565 | FAM3C | ENSG00000196937 | FAM3 metabolism regulating signaling molecule C [Source:HGNC Symbol;Acc:HGNC:18664] |
| ENSSSAG00000043582 | PARP1 | ENSG00000143799 | poly(ADP-ribose) polymerase 1 [Source:HGNC Symbol;Acc:HGNC:270] |
| ENSSSAG00000069956 | PSMD6 | ENSG00000163636 | proteasome 26S subunit, non-ATPase 6 [Source:HGNC Symbol;Acc:HGNC:9564] |
| ENSSSAG00000077285 | MYO6 | ENSG00000196586 | myosin VI [Source:HGNC Symbol;Acc:HGNC:7605] |
| ENSSSAG00000054039 | SURF4 | ENSG00000148248 | surfeit 4 [Source:HGNC Symbol;Acc:HGNC:11476] |
| ENSSSAG00000047797 | ANXA6 | ENSG00000197043 | annexin A6 [Source:HGNC Symbol;Acc:HGNC:544] |
| ENSSSAG00000092486 | UBE2L3 | ENSG00000185651 | ubiquitin conjugating enzyme E2 L3 [Source:HGNC Symbol;Acc:HGNC:12488] |
| ENSSSAG00000004995 | PKN1 | ENSG00000123143 | protein kinase N1 [Source:HGNC Symbol;Acc:HGNC:9405] |
| ENSSSAG00000076429 | GRN | ENSG00000030582 | granulin precursor [Source:HGNC Symbol;Acc:HGNC:4601] |
| ENSSSAG00000060487 | CD81 | ENSG00000110651 | CD81 molecule [Source:HGNC Symbol;Acc:HGNC:1701] |
| ENSSSAG00000055328 | CREB3L2 | ENSG00000182158 | cAMP responsive element binding protein 3 like 2 [Source:HGNC Symbol;Acc:HGNC:23720] |
| ENSSSAG00000077303 | PSMD2 | ENSG00000175166 | proteasome 26S subunit ubiquitin receptor, non-ATPase 2 [Source:HGNC Symbol;Acc:HGNC:9559] |
| ENSSSAG00000042378 | UBA7 | ENSG00000182179 | ubiquitin like modifier activating enzyme 7 [Source:HGNC Symbol;Acc:HGNC:12471] |
| ENSSSAG00000034990 | PSMA3 | ENSG00000100567 | proteasome 20S subunit alpha 3 [Source:HGNC Symbol;Acc:HGNC:9532] |
GATA3eomes_intersection_ora_downreg_4wpc <-
enrichGO(
gene = gata3EOMES_intersection_down_orthologs_tbl_4wpc$hsapiens_ensembl,
OrgDb = 'org.Hs.eg.db',
keyType = 'ENSEMBL',
ont = 'BP',
minGSSize = 10,
maxGSSize = 1000,
pAdjustMethod = 'BH',
pvalueCutoff = 0.05,
readable = T
)From an input of 244 salmon ENSEMBL gene IDs exclusive to EOMES, 181 genes were converted to human orthologs.
These 184 genes were used for over-representation analysis Figure 10, and filtered by immune-related term in Figure 11.
as_tibble(GATA3eomes_intersection_ora_downreg_4wpc) %>%
dplyr::slice(1:20) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>% # sorting 'ID' by Adjusted p-value
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity",
colour = 'black',
linewidth = 0.3) +
coord_flip() +
scale_fill_distiller(palette = 'Reds',
name = 'Adjusted \n p-value',
guide = guide_colorbar(reverse = TRUE)) +
scale_y_continuous() +
guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)as_tibble(GATA3eomes_intersection_ora_downreg_4wpc) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>%
dplyr::slice(1:20) %>%
dplyr::select(ID, Description) %>%
map_df(., rev) %>%
kable(
booktabs = TRUE,
col.names = c(
'GO Term',
'Description'
),
align = 'l') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| GO Term | Description |
|---|---|
| GO:0006897 | endocytosis |
| GO:2001233 | regulation of apoptotic signaling pathway |
| GO:0035966 | response to topologically incorrect protein |
| GO:0097190 | apoptotic signaling pathway |
| GO:0006909 | phagocytosis |
| GO:1903131 | mononuclear cell differentiation |
| GO:0006986 | response to unfolded protein |
| GO:0140467 | integrated stress response signaling |
| GO:0010498 | proteasomal protein catabolic process |
| GO:0002573 | myeloid leukocyte differentiation |
| GO:0043065 | positive regulation of apoptotic process |
| GO:0002521 | leukocyte differentiation |
| GO:0006457 | protein folding |
| GO:0043068 | positive regulation of programmed cell death |
| GO:0030099 | myeloid cell differentiation |
| GO:0051604 | protein maturation |
| GO:0006511 | ubiquitin-dependent protein catabolic process |
| GO:0043632 | modification-dependent macromolecule catabolic process |
| GO:0019941 | modification-dependent protein catabolic process |
| GO:0051603 | proteolysis involved in protein catabolic process |
downregulated_immune_GATA3eomes_4wpc <-
filter_rows_by_GO_term(GATA3eomes_intersection_ora_downreg_4wpc, immune_related_GOterms, 'goterms')
as_tibble(downregulated_immune_GATA3eomes_4wpc) %>%
dplyr::slice(1:10) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>% # sorting 'Description' by Adjusted p-value
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity",
colour = 'black',
linewidth = 0.3) +
coord_flip() +
scale_fill_distiller(palette = 'Purples',
name = 'Adjusted \n p-value',
guide = guide_colorbar(reverse = TRUE)) +
scale_y_continuous() +
guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)as_tibble(downregulated_immune_GATA3eomes_4wpc) %>%
dplyr::slice(1:10) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>%
dplyr::select(ID, Description) %>%
map_df(., rev) %>%
kable(
booktabs = TRUE,
col.names = c(
'GO Term',
'Description'
),
align = 'l') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| GO Term | Description |
|---|---|
| GO:0001767 | establishment of lymphocyte polarity |
| GO:0042110 | T cell activation |
| GO:0071216 | cellular response to biotic stimulus |
| GO:0097190 | apoptotic signaling pathway |
| GO:0006909 | phagocytosis |
| GO:1903131 | mononuclear cell differentiation |
| GO:0002573 | myeloid leukocyte differentiation |
| GO:0043065 | positive regulation of apoptotic process |
| GO:0002521 | leukocyte differentiation |
| GO:0030099 | myeloid cell differentiation |
gata3_eomes_dnavaccine_intersection <- setdiff(Reduce(
intersect,
list(gata3_4wpc_down, eomes_4wpc_down, dnavaccine_4wpc_down)
),
union(ivhd_4wpc_down, ivld_4wpc_down))
gata3_eomes_dnavaccine_intersection_orthologs <- gorth(
query = gata3_eomes_dnavaccine_intersection,
source_organism = 'ssalar',
target_organism = 'hsapiens',
mthreshold = 1,
filter_na = F
)
# head(gata3EOMES_down_orthologs_4wpc)
# nrow(gata3EOMES_down_orthologs_4wpc)
gata3_eomes_dnavaccine_intersection_tbl_orthologs <-
as_tibble(gata3_eomes_dnavaccine_intersection_orthologs) %>% dplyr::select(.,
input,
ortholog_name,
ortholog_ensg,
description) %>% dplyr::rename(
.,
ssalar_ensembl = input,
hsapiens_ortholog = ortholog_name,
hsapiens_ensembl = ortholog_ensg,
description = description
)
gata3_eomes_dnavaccine_intersection_tbl_orthologs %>% head(., n = 20) %>%
kable(
booktabs = TRUE,
col.names = c(
'Salmon ENSEMBL',
'Human ortholog',
'Human ENSEMBL',
'Description'
),
align = 'c') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)GATA3eomesDNAVACCINE_intersection_ora_downreg_4wpc <-
enrichGO(
gene = gata3_eomes_dnavaccine_intersection_tbl_orthologs$hsapiens_ensembl,
OrgDb = 'org.Hs.eg.db',
keyType = 'ENSEMBL',
ont = 'BP',
minGSSize = 10,
maxGSSize = 1000,
pAdjustMethod = 'BH',
pvalueCutoff = 0.05,
readable = T
)The 145 salmon ENSEMBL gene IDs from the intersection between GATA3, EOMES, and DNA vaccine, were converted to 110 human orthologs.
These 112 genes were used for over-representation analysis Figure 12, and filtered by immune-related term in Figure 13.
as_tibble(GATA3eomesDNAVACCINE_intersection_ora_downreg_4wpc) %>%
dplyr::slice(1:20) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>% # sorting 'Description' by Adjusted p-value
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity",
colour = 'black',
linewidth = 0.3) +
coord_flip() +
scale_fill_distiller(palette = 'Reds',
name = 'Adjusted \n p-value',
guide = guide_colorbar(reverse = TRUE)) +
scale_y_continuous() +
guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)as_tibble(GATA3eomesDNAVACCINE_intersection_ora_downreg_4wpc) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>%
dplyr::slice(1:20) %>%
dplyr::select(ID, Description) %>%
map_df(., rev) %>%
kable(
booktabs = TRUE,
col.names = c(
'GO Term',
'Description'
),
align = 'l') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)downregulated_immune_GATA3eomesDNAVACCINE_4wpc <-
filter_rows_by_GO_term(GATA3eomesDNAVACCINE_intersection_ora_downreg_4wpc, immune_related_GOterms, 'goterms')
as_tibble(downregulated_immune_GATA3eomesDNAVACCINE_4wpc) %>%
dplyr::slice(1:10) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>% # sorting 'Description' by Adjusted p-value
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity",
colour = 'black',
linewidth = 0.3) +
coord_flip() +
scale_fill_distiller(palette = 'Purples',
name = 'Adjusted \n p-value',
guide = guide_colorbar(reverse = TRUE)) +
scale_y_continuous() +
guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)as_tibble(downregulated_immune_GATA3eomesDNAVACCINE_4wpc) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>%
dplyr::slice(1:10) %>%
dplyr::select(ID, Description) %>%
map_df(., rev) %>%
kable(
booktabs = TRUE,
col.names = c(
'GO Term',
'Description'
),
align = 'l') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)intersectionGEDI <- Reduce(intersect, list(gata3_4wpc_down, eomes_4wpc_down, dnavaccine_4wpc_down, ivhd_4wpc_down))
gata3_eomes_dnavaccine_ivhd_intersection <- setdiff(intersectionGEDI, ivld_4wpc_down)
gata3_eomes_dnavaccine_ivhd_intersection_orthologs <- gorth(
query = gata3_eomes_dnavaccine_ivhd_intersection,
source_organism = 'ssalar',
target_organism = 'hsapiens',
mthreshold = 1,
filter_na = F
)
gata3_eomes_dnavaccine_ivhd_intersection_tbl_orthologs <-
as_tibble(gata3_eomes_dnavaccine_ivhd_intersection_orthologs) %>% dplyr::select(.,
input,
ortholog_name,
ortholog_ensg,
description) %>% dplyr::rename(
.,
ssalar_ensembl = input,
hsapiens_ortholog = ortholog_name,
hsapiens_ensembl = ortholog_ensg,
description = description
)
gata3_eomes_dnavaccine_ivhd_intersection_tbl_orthologs %>% head(., n = 20) %>%
kable(
booktabs = TRUE,
col.names = c(
'Salmon ENSEMBL',
'Human ortholog',
'Human ENSEMBL',
'Description'
),
align = 'c') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)GATA3eomesDNAVACCINEivhd_intersection_ora_downreg_4wpc <-
enrichGO(
gene = gata3_eomes_dnavaccine_ivhd_intersection_tbl_orthologs$hsapiens_ensembl,
OrgDb = 'org.Hs.eg.db',
keyType = 'ENSEMBL',
ont = 'BP',
minGSSize = 10,
maxGSSize = 1000,
pAdjustMethod = 'BH',
pvalueCutoff = 0.05,
readable = T
)The 179 salmon ENSEMBL gene IDs from the intersection between GATA3, EOMES, DNA vaccine, and IV-HD were converted to 144 human orthologs.
These 142 genes were used for over-representation analysis Figure 14, and filtered by immune-related term in Figure 15.
as_tibble(GATA3eomesDNAVACCINEivhd_intersection_ora_downreg_4wpc) %>%
dplyr::slice(1:20) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>% # sorting 'ID' by Adjusted p-value
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity",
colour = 'black',
linewidth = 0.3) +
coord_flip() +
scale_fill_distiller(palette = 'Reds',
name = 'Adjusted \n p-value',
guide = guide_colorbar(reverse = TRUE)) +
scale_y_continuous() +
guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)as_tibble(GATA3eomesDNAVACCINEivhd_intersection_ora_downreg_4wpc) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>%
dplyr::slice(1:20) %>%
dplyr::select(ID, Description) %>%
map_df(., rev) %>%
kable(
booktabs = TRUE,
col.names = c(
'GO Term',
'Description'
),
align = 'l') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)downregulated_immune_GATA3eomesDNAVACCINEivhd_4wpc <-
filter_rows_by_GO_term(GATA3eomesDNAVACCINEivhd_intersection_ora_downreg_4wpc, immune_related_GOterms, 'goterms')
as_tibble(downregulated_immune_GATA3eomesDNAVACCINEivhd_4wpc) %>%
dplyr::slice(1:10) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>% # sorting 'Description' by Adjusted p-value
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity",
colour = 'black',
linewidth = 0.3) +
coord_flip() +
scale_fill_distiller(palette = 'Purples',
name = 'Adjusted \n p-value',
guide = guide_colorbar(reverse = TRUE)) +
scale_y_continuous() +
guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)as_tibble(downregulated_immune_GATA3eomesDNAVACCINEivhd_4wpc) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>%
dplyr::slice(1:10) %>%
dplyr::select(ID, Description) %>%
map_df(., rev) %>%
kable(
booktabs = TRUE,
col.names = c(
'GO Term',
'Description'
),
align = 'l') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)# create list of upregulated geneIDs
a <- list(
A = significant_res_dnavaccine_vs_conu_4wpc[significant_res_dnavaccine_vs_conu_4wpc$log2FC > 0, ]$ID,
B = significant_res_eomes_vs_conu_4wpc[significant_res_eomes_vs_conu_4wpc$log2FC > 0, ]$ID,
C = significant_res_gata3_vs_conu_4wpc[significant_res_gata3_vs_conu_4wpc$log2FC > 0, ]$ID,
D = significant_res_ivhd_vs_conu_4wpc[significant_res_ivhd_vs_conu_4wpc$log2FC > 0, ]$ID,
E = significant_res_ivld_vs_conu_4wpc[significant_res_ivld_vs_conu_4wpc$log2FC > 0, ]$ID
)
# add treatment names
names(a) <-
c('DNA vaccine', 'EOMES', 'GATA3', 'IVHD', 'IVLD')
# check gene counts per treatment
kable((sapply(a, length)), col.names = c('count'))| count | |
|---|---|
| DNA vaccine | 685 |
| EOMES | 1018 |
| GATA3 | 1138 |
| IVHD | 595 |
| IVLD | 248 |
# remove treatments with low counts from the list
# a$IVLD <- NULL# plot Venn diagram
display_venn(
a,
fill = c('#cdb4db', '#bde0fe', '#ccd5ae', '#d4a373', '#f08080'),
lwd = 1,
cex = 1,
cat.cex = 1,
cat.fontfamily = 'serif',
# cat.fontface = 'bold',
cat.default.pos = 'outer',
cat.dist = c(0.20, 0.20, 0.22, 0.20,.20),
cat.pos = c(360, 360, 250, 90, 360))From the Venn diagram, I gathered the genes that are common to all treatments, and converted them to h. sapiens orthologs. They are listed on Table 19.
I also did over-representation analysis on the following gene lists:
EOMES (255 genes)
GATA3 (346 genes)
EOMES-GATA3 intersection (174 genes)
GATA3-DNA vaccine-IVHD intersection (174 genes)
common_genes_upregulated_4WPC <-
Reduce(
intersect,
list(
significant_res_dnavaccine_vs_conu_4wpc[significant_res_dnavaccine_vs_conu_4wpc$log2FC > 0, ]$ID,
significant_res_gata3_vs_conu_4wpc[significant_res_gata3_vs_conu_4wpc$log2FC > 0, ]$ID,
significant_res_eomes_vs_conu_4wpc[significant_res_eomes_vs_conu_4wpc$log2FC > 0, ]$ID,
significant_res_ivhd_vs_conu_4wpc[significant_res_ivhd_vs_conu_4wpc$log2FC > 0, ]$ID,
significant_res_ivld_vs_conu_4wpc[significant_res_ivld_vs_conu_4wpc$log2FC > 0, ]$ID
)
)
# length(common_genes_upregulated_4WPC)
common_upregulated_orthologs_4wpc <- gorth(
query = common_genes_upregulated_4WPC,
source_organism = 'ssalar',
target_organism = 'hsapiens',
mthreshold = 1,
filter_na = F
)
common_upregulated_orthologs_tbl_4wpc <-
as_tibble(common_upregulated_orthologs_4wpc) %>% dplyr::select(.,
input,
ortholog_name,
ortholog_ensg,
description) %>% dplyr::rename(
.,
ssalar_ensembl = input,
hsapiens_ortholog = ortholog_name,
hsapiens_ensembl = ortholog_ensg,
description = description
)
common_upregulated_orthologs_tbl_4wpc %>% head(., n = 20) %>%
kable(
booktabs = TRUE,
col.names = c(
'Salmon ENSEMBL',
'Human ortholog',
'Human ENSEMBL',
'Description'
),
align = 'c',
) %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| Salmon ENSEMBL | Human ortholog | Human ENSEMBL | Description |
|---|---|---|---|
| ENSSSAG00000064243 | GFI1B | ENSG00000165702 | growth factor independent 1B transcriptional repressor [Source:HGNC Symbol;Acc:HGNC:4238] |
| ENSSSAG00000055575 | N/A | N/A | N/A |
| ENSSSAG00000084117 | TNFRSF10A | ENSG00000104689 | TNF receptor superfamily member 10a [Source:HGNC Symbol;Acc:HGNC:11904] |
| ENSSSAG00000044296 | TAL1 | ENSG00000162367 | TAL bHLH transcription factor 1, erythroid differentiation factor [Source:HGNC Symbol;Acc:HGNC:11556] |
| ENSSSAG00000087439 | HBG2 | ENSG00000196565 | hemoglobin subunit gamma 2 [Source:HGNC Symbol;Acc:HGNC:4832] |
| ENSSSAG00000074325 | PPFIA4 | ENSG00000143847 | PTPRF interacting protein alpha 4 [Source:HGNC Symbol;Acc:HGNC:9248] |
| ENSSSAG00000057121 | ENPP2 | ENSG00000136960 | ectonucleotide pyrophosphatase/phosphodiesterase 2 [Source:HGNC Symbol;Acc:HGNC:3357] |
| ENSSSAG00000045065 | HBE1 | ENSG00000213931 | hemoglobin subunit epsilon 1 [Source:HGNC Symbol;Acc:HGNC:4830] |
| ENSSSAG00000046626 | CAT | ENSG00000121691 | catalase [Source:HGNC Symbol;Acc:HGNC:1516] |
| ENSSSAG00000000221 | AQP1 | ENSG00000240583 | aquaporin 1 (Colton blood group) [Source:HGNC Symbol;Acc:HGNC:633] |
| ENSSSAG00000103747 | HBE1 | ENSG00000213931 | hemoglobin subunit epsilon 1 [Source:HGNC Symbol;Acc:HGNC:4830] |
| ENSSSAG00000044737 | HBG2 | ENSG00000196565 | hemoglobin subunit gamma 2 [Source:HGNC Symbol;Acc:HGNC:4832] |
| ENSSSAG00000118282 | TWF2 | ENSG00000247596 | twinfilin actin binding protein 2 [Source:HGNC Symbol;Acc:HGNC:9621] |
| ENSSSAG00000106259 | N/A | N/A | N/A |
| ENSSSAG00000006088 | BNIP3 | ENSG00000176171 | BCL2 interacting protein 3 [Source:HGNC Symbol;Acc:HGNC:1084] |
| ENSSSAG00000065233 | HBE1 | ENSG00000213931 | hemoglobin subunit epsilon 1 [Source:HGNC Symbol;Acc:HGNC:4830] |
| ENSSSAG00000105188 | SOX4 | ENSG00000124766 | SRY-box transcription factor 4 [Source:HGNC Symbol;Acc:HGNC:11200] |
| ENSSSAG00000088104 | DBP | ENSG00000105516 | D-box binding PAR bZIP transcription factor [Source:HGNC Symbol;Acc:HGNC:2697] |
| ENSSSAG00000089742 | N/A | N/A | N/A |
| ENSSSAG00000105062 | IGFBP3 | ENSG00000146674 | insulin like growth factor binding protein 3 [Source:HGNC Symbol;Acc:HGNC:5472] |
From an input of 168 salmon ENSEMBL gene IDs, 136 genes were converted to human orthologs.
common_genes_ora_downreg_4wpc <-
enrichGO(
gene = common_upregulated_orthologs_tbl_4wpc$hsapiens_ensembl,
OrgDb = 'org.Hs.eg.db',
keyType = 'ENSEMBL',
ont = 'BP',
minGSSize = 10,
maxGSSize = 1000,
pAdjustMethod = 'BH',
pvalueCutoff = 0.05,
readable = T
)as_tibble(common_genes_ora_downreg_4wpc) %>%
dplyr::slice(1:20) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>% # sorting 'Description' by Adjusted p-value
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity",
colour = 'black',
linewidth = 0.3,
) +
coord_flip() +
scale_fill_distiller(palette = 'Reds',
name = 'Adjusted \n p-value',
guide = guide_colorbar(reverse = TRUE)) +
scale_y_continuous() +
guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)as_tibble(common_genes_ora_downreg_4wpc) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>%
dplyr::slice(1:20) %>%
dplyr::select(ID, Description) %>%
map_df(., rev) %>%
kable(
booktabs = TRUE,
col.names = c(
'GO Term',
'Description'
),
align = 'l') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| GO Term | Description |
|---|---|
| GO:0009142 | nucleoside triphosphate biosynthetic process |
| GO:0072522 | purine-containing compound biosynthetic process |
| GO:0009201 | ribonucleoside triphosphate biosynthetic process |
| GO:0006164 | purine nucleotide biosynthetic process |
| GO:0072521 | purine-containing compound metabolic process |
| GO:1901293 | nucleoside phosphate biosynthetic process |
| GO:0009165 | nucleotide biosynthetic process |
| GO:0006163 | purine nucleotide metabolic process |
| GO:0006753 | nucleoside phosphate metabolic process |
| GO:0009117 | nucleotide metabolic process |
| GO:0009152 | purine ribonucleotide biosynthetic process |
| GO:0006091 | generation of precursor metabolites and energy |
| GO:0061061 | muscle structure development |
| GO:0055086 | nucleobase-containing small molecule metabolic process |
| GO:0046390 | ribose phosphate biosynthetic process |
| GO:0009260 | ribonucleotide biosynthetic process |
| GO:0006119 | oxidative phosphorylation |
| GO:0015980 | energy derivation by oxidation of organic compounds |
| GO:0045333 | cellular respiration |
| GO:0009060 | aerobic respiration |
The upregulated genes common to all treatments enriched a total of 186 pathways, but only the top 20 by adjusted p-value are displayed in Figure 17.
dnavaccine_4wpc_up <-
significant_res_dnavaccine_vs_conu_4wpc[significant_res_dnavaccine_vs_conu_4wpc$log2FC > 0,]$ID
eomes_4wpc_up <-
significant_res_eomes_vs_conu_4wpc[significant_res_eomes_vs_conu_4wpc$log2FC > 0,]$ID
gata3_4wpc_up <-
significant_res_gata3_vs_conu_4wpc[significant_res_gata3_vs_conu_4wpc$log2FC > 0,]$ID
ivhd_4wpc_up <-
significant_res_ivhd_vs_conu_4wpc[significant_res_ivhd_vs_conu_4wpc$log2FC > 0,]$ID
ivld_4wpc_up <-
significant_res_ivld_vs_conu_4wpc[significant_res_ivld_vs_conu_4wpc$log2FC > 0,]$ID
gata3_exclusive_up_genes_4wpc <-
setdiff(gata3_4wpc_up,
c(dnavaccine_4wpc_up, eomes_4wpc_up, ivhd_4wpc_up, ivld_4wpc_up))
# length(gata3_exclusive_up_genes_4wpc)
gata3_upregulated_orthologs_4wpc <- gorth(
query = gata3_exclusive_up_genes_4wpc,
source_organism = 'ssalar',
target_organism = 'hsapiens',
mthreshold = 1,
filter_na = F
)
gata3_upregulated_orthologs_tbl_4wpc <-
as_tibble(gata3_upregulated_orthologs_4wpc) %>% dplyr::select(.,
input,
ortholog_name,
ortholog_ensg,
description) %>% dplyr::rename(
.,
ssalar_ensembl = input,
hsapiens_ortholog = ortholog_name,
hsapiens_ensembl = ortholog_ensg,
description = description
)
gata3_upregulated_orthologs_tbl_4wpc %>% head(., n = 20) %>%
kable(
booktabs = TRUE,
col.names = c(
'Salmon ENSEMBL',
'Human ortholog',
'Human ENSEMBL',
'Description'
),
align = 'c') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| Salmon ENSEMBL | Human ortholog | Human ENSEMBL | Description |
|---|---|---|---|
| ENSSSAG00000114656 | COL10A1 | ENSG00000123500 | collagen type X alpha 1 chain [Source:HGNC Symbol;Acc:HGNC:2185] |
| ENSSSAG00000105668 | KLF17 | ENSG00000171872 | KLF transcription factor 17 [Source:HGNC Symbol;Acc:HGNC:18830] |
| ENSSSAG00000047878 | PAPLN | ENSG00000100767 | papilin, proteoglycan like sulfated glycoprotein [Source:HGNC Symbol;Acc:HGNC:19262] |
| ENSSSAG00000044874 | PRSS16 | ENSG00000112812 | serine protease 16 [Source:HGNC Symbol;Acc:HGNC:9480] |
| ENSSSAG00000114639 | N/A | N/A | N/A |
| ENSSSAG00000107243 | ABCG4 | ENSG00000172350 | ATP binding cassette subfamily G member 4 [Source:HGNC Symbol;Acc:HGNC:13884] |
| ENSSSAG00000084306 | CNGA2 | ENSG00000183862 | cyclic nucleotide gated channel subunit alpha 2 [Source:HGNC Symbol;Acc:HGNC:2149] |
| ENSSSAG00000109302 | FAM174B | ENSG00000185442 | family with sequence similarity 174 member B [Source:HGNC Symbol;Acc:HGNC:34339] |
| ENSSSAG00000064220 | PADI2 | ENSG00000117115 | peptidyl arginine deiminase 2 [Source:HGNC Symbol;Acc:HGNC:18341] |
| ENSSSAG00000106865 | ART4 | ENSG00000111339 | ADP-ribosyltransferase 4 (inactive) (Dombrock blood group) [Source:HGNC Symbol;Acc:HGNC:726] |
| ENSSSAG00000084240 | TAL1 | ENSG00000162367 | TAL bHLH transcription factor 1, erythroid differentiation factor [Source:HGNC Symbol;Acc:HGNC:11556] |
| ENSSSAG00000089308 | N/A | N/A | N/A |
| ENSSSAG00000015164 | NRP1 | ENSG00000099250 | neuropilin 1 [Source:HGNC Symbol;Acc:HGNC:8004] |
| ENSSSAG00000080590 | KLF17 | ENSG00000171872 | KLF transcription factor 17 [Source:HGNC Symbol;Acc:HGNC:18830] |
| ENSSSAG00000105530 | N/A | N/A | N/A |
| ENSSSAG00000006070 | SEPTIN3 | ENSG00000100167 | septin 3 [Source:HGNC Symbol;Acc:HGNC:10750] |
| ENSSSAG00000108653 | UFSP1 | ENSG00000176125 | UFM1 specific peptidase 1 (inactive) [Source:HGNC Symbol;Acc:HGNC:33821] |
| ENSSSAG00000007672 | RTKN | ENSG00000114993 | rhotekin [Source:HGNC Symbol;Acc:HGNC:10466] |
| ENSSSAG00000005588 | RASSF9 | ENSG00000198774 | Ras association domain family member 9 [Source:HGNC Symbol;Acc:HGNC:15739] |
| ENSSSAG00000086990 | N/A | N/A | N/A |
349 genes were upregulated exclusively in GATA3 and 258 were converted to human orthologs.
These 256 genes were used for over-representation analysis Figure 18. Only 4 pathways were enriched in these upregulated genes. None of them was immune related.
gata3_ora_upreg_4wpc <-
enrichGO(
gene = gata3_upregulated_orthologs_tbl_4wpc$hsapiens_ensembl,
OrgDb = 'org.Hs.eg.db',
keyType = 'ENSEMBL',
ont = 'BP',
minGSSize = 10,
maxGSSize = 1000,
pAdjustMethod = 'BH',
pvalueCutoff = 0.05,
readable = T
)as_tibble(gata3_ora_upreg_4wpc) %>%
dplyr::slice(1:20) %>%
mutate(Description = fct_reorder(Description, p.adjust)) %>% # sorting 'Description' by Adjusted p-value
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity",
colour = 'black',
linewidth = 0.3,
) +
coord_flip() +
scale_fill_distiller(palette = 'Reds',
name = 'Adjusted \n p-value',
guide = guide_colorbar(reverse = TRUE)) +
scale_y_continuous() +
guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)as_tibble(gata3_ora_upreg_4wpc) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>%
dplyr::slice(1:20) %>%
dplyr::select(ID, Description) %>%
map_df(., rev) %>%
kable(
booktabs = TRUE,
col.names = c(
'GO Term',
'Description'
),
align = 'l') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| GO Term | Description |
|---|---|
| GO:0043043 | peptide biosynthetic process |
| GO:0006412 | translation |
| GO:0002181 | cytoplasmic translation |
eomes_exclusive_up_genes_4wpc <-
setdiff(eomes_4wpc_up,
c(dnavaccine_4wpc_up, gata3_4wpc_up, ivhd_4wpc_up, ivld_4wpc_up))
# length(eomes_exclusive_up_genes_4wpc)
eomes_upregulated_orthologs_4wpc <- gorth(
query = eomes_exclusive_up_genes_4wpc,
source_organism = 'ssalar',
target_organism = 'hsapiens',
mthreshold = 1,
filter_na = F
)
eomes_upregulated_orthologs_tbl_4wpc <-
as_tibble(eomes_upregulated_orthologs_4wpc) %>% dplyr::select(.,
input,
ortholog_name,
ortholog_ensg,
description) %>% dplyr::rename(
.,
ssalar_ensembl = input,
hsapiens_ortholog = ortholog_name,
hsapiens_ensembl = ortholog_ensg,
description = description
)
eomes_upregulated_orthologs_tbl_4wpc %>% head(., n = 20) %>%
kable(
booktabs = TRUE,
col.names = c(
'Salmon ENSEMBL',
'Human ortholog',
'Human ENSEMBL',
'Description'
),
align = 'c') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| Salmon ENSEMBL | Human ortholog | Human ENSEMBL | Description |
|---|---|---|---|
| ENSSSAG00000044078 | PAPPA2 | ENSG00000116183 | pappalysin 2 [Source:HGNC Symbol;Acc:HGNC:14615] |
| ENSSSAG00000116632 | GFRA2 | ENSG00000168546 | GDNF family receptor alpha 2 [Source:HGNC Symbol;Acc:HGNC:4244] |
| ENSSSAG00000093513 | N/A | N/A | N/A |
| ENSSSAG00000056038 | GLRA1 | ENSG00000145888 | glycine receptor alpha 1 [Source:HGNC Symbol;Acc:HGNC:4326] |
| ENSSSAG00000078714 | OSBPL1A | ENSG00000141447 | oxysterol binding protein like 1A [Source:HGNC Symbol;Acc:HGNC:16398] |
| ENSSSAG00000101638 | N/A | N/A | N/A |
| ENSSSAG00000115817 | N/A | N/A | N/A |
| ENSSSAG00000085775 | N/A | N/A | N/A |
| ENSSSAG00000096367 | N/A | N/A | N/A |
| ENSSSAG00000100368 | N/A | N/A | N/A |
| ENSSSAG00000075250 | N/A | N/A | N/A |
| ENSSSAG00000002605 | XPNPEP3 | ENSG00000196236 | X-prolyl aminopeptidase 3 [Source:HGNC Symbol;Acc:HGNC:28052] |
| ENSSSAG00000088168 | N/A | N/A | N/A |
| ENSSSAG00000042950 | FAM20A | ENSG00000108950 | FAM20A golgi associated secretory pathway pseudokinase [Source:HGNC Symbol;Acc:HGNC:23015] |
| ENSSSAG00000105856 | N/A | N/A | N/A |
| ENSSSAG00000068404 | PI4KB | ENSG00000143393 | phosphatidylinositol 4-kinase beta [Source:HGNC Symbol;Acc:HGNC:8984] |
| ENSSSAG00000006897 | SLC45A2 | ENSG00000164175 | solute carrier family 45 member 2 [Source:HGNC Symbol;Acc:HGNC:16472] |
| ENSSSAG00000000688 | PITPNC1 | ENSG00000154217 | phosphatidylinositol transfer protein cytoplasmic 1 [Source:HGNC Symbol;Acc:HGNC:21045] |
| ENSSSAG00000070217 | MYH7 | ENSG00000092054 | myosin heavy chain 7 [Source:HGNC Symbol;Acc:HGNC:7577] |
| ENSSSAG00000038530 | N/A | N/A | N/A |
eomes_ora_upreg_4wpc <-
enrichGO(
gene = eomes_upregulated_orthologs_tbl_4wpc$hsapiens_ensembl,
OrgDb = 'org.Hs.eg.db',
keyType = 'ENSEMBL',
ont = 'BP',
minGSSize = 10,
maxGSSize = 1000,
pAdjustMethod = 'BH',
pvalueCutoff = 0.05,
readable = T
)as_tibble(eomes_ora_upreg_4wpc) %>%
dplyr::slice(1:20) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>% # sorting 'ID' by Adjusted p-value
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity",
colour = 'black',
linewidth = 0.3,
) +
coord_flip() +
scale_fill_distiller(palette = 'Reds',
name = 'Adjusted \n p-value',
guide = guide_colorbar(reverse = TRUE)) +
scale_y_continuous() +
guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)Out of 255 (upregulated) genes used in over-representation analysis, 0 pathways were over-represented.
intersectionGE <- intersect(gata3_4wpc_up, eomes_4wpc_up)
# length(intersection2)
gata3EOMES_exclusive_genes_up <- setdiff(intersectionGE,
c(dnavaccine_4wpc_up, ivhd_4wpc_up, ivld_4wpc_up))
# length(gata3EOMES_exclusive_genes_up)
gata3EOMES_upregulated_orthologs_4wpc <- gorth(
query = gata3EOMES_exclusive_genes_up,
source_organism = 'ssalar',
target_organism = 'hsapiens',
mthreshold = 1,
filter_na = F
)
gata3EOMES_upregulated_orthologs_tbl_4wpc <-
as_tibble(gata3EOMES_upregulated_orthologs_4wpc) %>% dplyr::select(.,
input,
ortholog_name,
ortholog_ensg,
description) %>% dplyr::rename(
.,
ssalar_ensembl = input,
hsapiens_ortholog = ortholog_name,
hsapiens_ensembl = ortholog_ensg,
description = description
)
gata3EOMES_upregulated_orthologs_tbl_4wpc %>% head(., n = 20) %>%
kable(
booktabs = TRUE,
col.names = c(
'Salmon ENSEMBL',
'Human ortholog',
'Human ENSEMBL',
'Description'
),
align = 'c') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| Salmon ENSEMBL | Human ortholog | Human ENSEMBL | Description |
|---|---|---|---|
| ENSSSAG00000045171 | IGFBP7 | ENSG00000163453 | insulin like growth factor binding protein 7 [Source:HGNC Symbol;Acc:HGNC:5476] |
| ENSSSAG00000010340 | COL6A2 | ENSG00000142173 | collagen type VI alpha 2 chain [Source:HGNC Symbol;Acc:HGNC:2212] |
| ENSSSAG00000085630 | N/A | N/A | N/A |
| ENSSSAG00000079527 | NIBAN2 | ENSG00000136830 | niban apoptosis regulator 2 [Source:HGNC Symbol;Acc:HGNC:25282] |
| ENSSSAG00000121679 | N/A | N/A | N/A |
| ENSSSAG00000070347 | MOBP | ENSG00000168314 | myelin associated oligodendrocyte basic protein [Source:HGNC Symbol;Acc:HGNC:7189] |
| ENSSSAG00000086945 | PPDPF | ENSG00000125534 | pancreatic progenitor cell differentiation and proliferation factor [Source:HGNC Symbol;Acc:HGNC:16142] |
| ENSSSAG00000068629 | NMT1 | ENSG00000136448 | N-myristoyltransferase 1 [Source:HGNC Symbol;Acc:HGNC:7857] |
| ENSSSAG00000056040 | RHAG | ENSG00000112077 | Rh associated glycoprotein [Source:HGNC Symbol;Acc:HGNC:10006] |
| ENSSSAG00000089701 | N/A | N/A | N/A |
| ENSSSAG00000086795 | FBXO32 | ENSG00000156804 | F-box protein 32 [Source:HGNC Symbol;Acc:HGNC:16731] |
| ENSSSAG00000100606 | N/A | N/A | N/A |
| ENSSSAG00000094473 | N/A | N/A | N/A |
| ENSSSAG00000094946 | TSSC4 | ENSG00000184281 | tumor suppressing subtransferable candidate 4 [Source:HGNC Symbol;Acc:HGNC:12386] |
| ENSSSAG00000047323 | MYBPC1 | ENSG00000196091 | myosin binding protein C1 [Source:HGNC Symbol;Acc:HGNC:7549] |
| ENSSSAG00000089276 | RGCC | ENSG00000102760 | regulator of cell cycle [Source:HGNC Symbol;Acc:HGNC:20369] |
| ENSSSAG00000039832 | IRS2 | ENSG00000185950 | insulin receptor substrate 2 [Source:HGNC Symbol;Acc:HGNC:6126] |
| ENSSSAG00000096709 | HDAC9 | ENSG00000048052 | histone deacetylase 9 [Source:HGNC Symbol;Acc:HGNC:14065] |
| ENSSSAG00000008223 | FNDC1 | ENSG00000164694 | fibronectin type III domain containing 1 [Source:HGNC Symbol;Acc:HGNC:21184] |
| ENSSSAG00000042825 | JAK2 | ENSG00000096968 | Janus kinase 2 [Source:HGNC Symbol;Acc:HGNC:6192] |
gata3EOMES_ora_upreg_4wpc <-
enrichGO(
gene = gata3EOMES_upregulated_orthologs_tbl_4wpc$hsapiens_ensembl,
OrgDb = 'org.Hs.eg.db',
keyType = 'ENSEMBL',
ont = 'BP',
minGSSize = 10,
maxGSSize = 1000,
pAdjustMethod = 'BH',
pvalueCutoff = 0.05,
readable = T
)as_tibble(gata3EOMES_ora_upreg_4wpc) %>%
dplyr::slice(1:20) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>% # sorting 'ID' by Adjusted p-value
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity",
colour = 'black',
linewidth = 0.3,
) +
coord_flip() +
scale_fill_distiller(palette = 'Reds',
name = 'Adjusted \n p-value',
guide = guide_colorbar(reverse = TRUE)) +
scale_y_continuous() +
guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)as_tibble(gata3EOMES_ora_upreg_4wpc) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>%
dplyr::slice(1:20) %>%
dplyr::select(ID, Description) %>%
map_df(., rev) %>%
kable(
booktabs = TRUE,
col.names = c(
'GO Term',
'Description'
),
align = 'l') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)| GO Term | Description |
|---|---|
| GO:0010927 | cellular component assembly involved in morphogenesis |
| GO:0055002 | striated muscle cell development |
| GO:0030239 | myofibril assembly |
| GO:0022613 | ribonucleoprotein complex biogenesis |
| GO:0051100 | negative regulation of binding |
| GO:0055001 | muscle cell development |
| GO:0042254 | ribosome biogenesis |
| GO:0045214 | sarcomere organization |
| GO:0051098 | regulation of binding |
| GO:0051101 | regulation of DNA binding |
The 171 orthologs upregulated in the EOMES-GATA3 intersection were over-represented in only 10 pathways, none of them immune-related.
intersectionGEDI_up <- Reduce(intersect, list(gata3_4wpc_up, eomes_4wpc_up, dnavaccine_4wpc_up, ivhd_4wpc_up))
gata3_eomes_dnavaccine_ivhd_intersection <- setdiff(intersectionGEDI, ivld_4wpc_up)
gata3_eomes_dnavaccine_ivhd_intersection_orthologs <- gorth(
query = gata3_eomes_dnavaccine_ivhd_intersection,
source_organism = 'ssalar',
target_organism = 'hsapiens',
mthreshold = 1,
filter_na = F
)
gata3_eomes_dnavaccine_ivhd_intersection_tbl_orthologs <-
as_tibble(gata3_eomes_dnavaccine_ivhd_intersection_orthologs) %>% dplyr::select(.,
input,
ortholog_name,
ortholog_ensg,
description) %>% dplyr::rename(
.,
ssalar_ensembl = input,
hsapiens_ortholog = ortholog_name,
hsapiens_ensembl = ortholog_ensg,
description = description
)
gata3_eomes_dnavaccine_ivhd_intersection_tbl_orthologs %>% head(., n = 20) %>%
kable(
booktabs = TRUE,
col.names = c(
'Salmon ENSEMBL',
'Human ortholog',
'Human ENSEMBL',
'Description'
),
align = 'c') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)GATA3eomesDNAVACCINEivhd_intersection_ora_upreg_4wpc <-
enrichGO(
gene = gata3_eomes_dnavaccine_ivhd_intersection_tbl_orthologs$hsapiens_ensembl,
OrgDb = 'org.Hs.eg.db',
keyType = 'ENSEMBL',
ont = 'BP',
minGSSize = 10,
maxGSSize = 1000,
pAdjustMethod = 'BH',
pvalueCutoff = 0.05,
readable = T
)The 334 salmon ENSEMBL gene IDs from the intersection between GATA3, EOMES, DNA vaccine, and IV-HD were converted to 268 human orthologs. These 268 genes were used for over-representation analysis Figure 20, and filtered by immune-related term in Figure 21.
as_tibble(GATA3eomesDNAVACCINEivhd_intersection_ora_upreg_4wpc) %>%
dplyr::slice(1:20) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>% # sorting 'ID' by Adjusted p-value
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity",
colour = 'black',
linewidth = 0.3) +
coord_flip() +
scale_fill_distiller(palette = 'Reds',
name = 'Adjusted \n p-value',
guide = guide_colorbar(reverse = TRUE)) +
scale_y_continuous() +
guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)as_tibble(GATA3eomesDNAVACCINEivhd_intersection_ora_upreg_4wpc) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>%
dplyr::slice(1:20) %>%
dplyr::select(ID, Description) %>%
map_df(., rev) %>%
kable(
booktabs = TRUE,
col.names = c(
'GO Term',
'Description'
),
align = 'l') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)upregulated_immune_GATA3eomesDNAVACCINEivhd_4wpc <-
filter_rows_by_GO_term(GATA3eomesDNAVACCINEivhd_intersection_ora_upreg_4wpc, immune_related_GOterms, 'goterms')
as_tibble(upregulated_immune_GATA3eomesDNAVACCINEivhd_4wpc) %>%
dplyr::slice(1:10) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>% # sorting 'ID' by Adjusted p-value
ggplot(aes(x = ID, y = Count, fill = p.adjust)) +
geom_bar(stat = "identity",
colour = 'black',
linewidth = 0.3) +
coord_flip() +
scale_fill_distiller(palette = 'Purples',
name = 'Adjusted \n p-value',
guide = guide_colorbar(reverse = TRUE)) +
scale_y_continuous() +
guides(fill = guide_colourbar(barwidth = 1, barheight = 7)) +
labs(x = '', y = 'Gene count', fill = 'Adjusted p-value') +
theme_classic() +
theme(axis.text.y = element_text(
color = "black",
size = 14,
face = "plain"
)) +
theme(legend.position = 'right') +
theme(axis.title = element_text()) +
theme(
text = element_text(family = 'serif', size = 14),
plot.margin = margin(
t = 5,
r = 0,
l = 0,
b = 0
),
plot.title = element_text(hjust = 0)
)as_tibble(upregulated_immune_GATA3eomesDNAVACCINEivhd_4wpc) %>%
mutate(ID = fct_reorder(ID, p.adjust)) %>%
dplyr::slice(1:10) %>%
dplyr::select(ID, Description) %>%
map_df(., rev) %>%
kable(
booktabs = TRUE,
col.names = c(
'GO Term',
'Description'
),
align = 'l') %>%
kableExtra::row_spec(., row = 0, italic = TRUE) %>%
kableExtra::kable_styling(font_size = 12)The number of enriched (over-represented) pathways was 631, of which 196 were immune-related pathways.
Besides running over-representation analysis, I also tried to do Gene Set Enrichment Analysis (GSEA). While ORA is based on the overlap of gene sets, GSEA considers the distribution of gene expression changes in a dataset. In this case, I started from genes that were commonly expressed in all treatments in relation to CONU. The sum of the center of both Venn diagrams, since GSEA uses down- and upregulated at the same time, as input. These were converted to orthologs, and because salmon ENSEMBL ID to human ortholog conversion yields repeated gene symbols, I had to select one gene ‘version’ per treatment (the one with the lowest adjusted p-value)
# Loop through each treatment and apply the function, also adding a column with treatment information
# Vertically merge data frames from all treatments and remove NA
merged_df <- do.call(rbind, result_list) %>% na.omit()
# Reduce and intersect the ID column in result_list to find common differentially regulated genes among all treatments
reduced_intersected_ids <-
Reduce(intersect, lapply(result_list, function(df)
df$ID))
# Find the common IDs between reduced_intersected_ids and merged_df. This way we are keeping only the common regulated salmon genes
common_ids <- intersect(merged_df$ID, reduced_intersected_ids)
# Subset merged_df to keep only rows with common IDs
# merged_subset <- merged_df[merged_df$ID %in% common_ids,]
merged_subset <-
subset(merged_df, ID %in% common_ids) # tidy version
# Splitting between up and downregulated genes, and sorting by gene name and adjusted p-value
upregulated_gsea <-
merged_subset %>% dplyr::filter(log2FC > 0) %>% arrange(ortholog_name, adjusted_p.val)
downregulated_gsea <-
merged_subset %>% dplyr::filter(log2FC < 0) %>% arrange(ortholog_name, adjusted_p.val)
# Keeping just one of the ortholog copies, the one with the lowest adjusted p-value
unique_upregulated <-
upregulated_gsea[!duplicated(upregulated_gsea$ortholog_ensg),] %>% arrange(ortholog_name)
unique_downregulated <-
downregulated_gsea[!duplicated(downregulated_gsea$ortholog_ensg),] %>% arrange(ortholog_name)
# Merging down and upregulated dataframes after removing duplicates
enrichment_unique_common <-
rbind(unique_downregulated, unique_upregulated) %>% dplyr::arrange(desc(log2FC))
# Arranging matrix for GSEA
enrichment_gsea_common <- enrichment_unique_common$log2FC
names(enrichment_gsea_common) <-
enrichment_unique_common$ortholog_ensg# Running GSEA on common regulated genes
#| label: gsea-common-genes-4wpc
#| warning: false
#| cache: true
gsea_common <- gseGO(
geneList = enrichment_gsea_common,
OrgDb = org.Hs.eg.db,
keyType = 'ENSEMBL',
ont = 'BP',
pvalueCutoff = 0.05,
pAdjustMethod = 'BH',
eps = 0,
verbose = T
)edox <- setReadable(gsea_common, 'org.Hs.eg.db', 'ENSEMBL')
cnetplot(
edox,
color.params = list(foldChange = enrichment_gsea_common),
circular = T,
colorEdge = TRUE,
cex_category = 1,
cex_gene = 1,
cex_label_category = 0.8,
cex_label_gene = 0.8
)